#!/bin/sh 
# this file transfer the original document-term file to sequential documentId-termId file
if [ $# -lt 4 ];
then
    echo "usage: transferId.sh src_file dst_file [term_map_file doc_map_file]";
    exit
fi

srcFile=$1
dstFile=$2

if [ $# -gt 2 ];
then
    termIdMapFile=$3
else
    termIdMapFile="../data/lsa/term_id.map"
fi
if [ $# -gt 3 ];
then
    documentIdMapFile=$4
else
    documentIdMapFile="../data/lsa/document_id.map"
fi

awk '
BEGIN {
    FS="\t"
}
FILENAME==ARGV[1]{
    { #first use the dict file to get the select item dict
        selectTerm[$1] = $2;
    }
}
FILENAME==ARGV[2]{
    { #second use the dict file to get the select user dict
        selectDocument[$1] = $2;
    }
}
FILENAME==ARGV[3]{
    #select and map the ratings
    documentId = selectDocument[$1];
    split($2, tmpArray, " ");
    for (termId in tmpArray) {
        if (length(tmpArray[termId]) < 1) continue;
        split(tmpArray[termId], termArray, "@");
        termId = selectTerm[termArray[1]];
        if (termId) {
            print documentId, termId, termArray[2];
        }
        if (!termArray[2]) {
            print $0 >> "test.txt";
            break;
        }
    }
}' $termIdMapFile $documentIdMapFile $srcFile > $dstFile
